package cn.sheep.dmp.etl




import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Sheep.Old @ 64341393
  * Created 2018/3/28
  */
object Sql2Parquet {

  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf().setAppName("日志转parquet文件")
      .setMaster("local[*]")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") // RDD

    val sc = new SparkContext(sparkConf)
    val sqlc = new SQLContext(sc)

    val parquet = sqlc.read.parquet("parquet")

    parquet.registerTempTable("logs")

    sqlc.sql("select provincename,cityname,count(1) from logs group by provincename,cityname")

    sc.stop()
  }

}
